Exercise: Increasing Cohesion

Here is an excerpt from a Python script that processes a CSV file:

Record = dict[str, Any]

def main() -> None:
  with open("data.csv") as f:
    reader = csv.DictReader(f)
    data: list[Record] = list(reader)

    processed_data: list[Record] = []
    for row in data:
      row_copy = row.copy()
      if row_copy["status"] == "active":
        row_copy["is_active"] = True
      else:
        row_copy["is_active"] = False
      processed_data.append(row_copy)

    with open("processed.csv", "w") as f:
      writer = csv.DictWriter(f, fieldnames=["name", "status", "is_active"])
      writer.writeheader()
      writer.writerows(processed_data)

Why is this code not very cohesive? How can you refactor it to increase cohesion?

Compatible Python Versions: 3.9+


Kostiantyn Ivashchenko

Hello, ArjanCodes Team!
What do you think about this approach?

from pathlib import Path
from typing import List
from dataclasses import dataclass, field

import csv

BASE_DIR = Path(__file__).resolve().parent
CSV_PATH = BASE_DIR / "data.csv"
PROCESSED_CSV_PATH = BASE_DIR / "processed.csv"

@dataclass
class Record:
name: str
status: str
is_active: bool = field(init=False) # Add is_active field

def __post_init__(self):
if self.status == "active":
self.is_active = True
else:
self.is_active = False

class DataHandler:
def __init__(self, csv_path: Path, processed_csv_path: Path):
self.csv_path = csv_path
self.processed_csv_path = processed_csv_path

def read_csv(self) -> List[Record]:
with open(self.csv_path) as f:
reader = csv.DictReader(f)
data: List[Record] = [Record(name=row['name'], status=row['status']) for row in reader]
return data

def write_csv(self, data: List[Record]) -> None:
with open(self.processed_csv_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=["name", "status", "is_active"])
writer.writeheader()
writer.writerows([record.__dict__ for record in data])

def main() -> None:
data_handler = DataHandler(CSV_PATH, PROCESSED_CSV_PATH)
data = data_handler.read_csv()

data_handler.write_csv(data)

# Check results
with open(PROCESSED_CSV_PATH) as f:
reader = csv.DictReader(f)
processed_data = list(reader)
print(f"Data after writing: {processed_data}")

if __name__ == "__main__":
main()

REPLY
Andreas [ArjanCodes Team]

Nice solution, Kostiantyn, I would argue that this solution has increased cohesion, but I think it could be improved.

* The idea of having a class data_handler with methods, in my view, is not necessary since it does not hold any state. In this case, these methods could have been separate functions
* Which Python version are you using? If you are using 3.12 or higher, you do not need to import List, you can use the built-in types list

REPLY
Kostiantyn Ivashchenko

Yes, you are correct. I did go through next lessons already and I saw where the excessive part.
I use python 3.11 still.

REPLY
Andreas [ArjanCodes Team]

Cool!

Yeah, that is what I imagined! :)

REPLY
ABDALLAH EL HIDALI

def read_csv(file_path: str) -> list[Record]:
with open(file_path) as f:
reader = csv.DictReader(f)
return list(reader)

def save_csv(file_path: str, data: list[Record], fieldnames: list[str]=("name", "status", "is_active")) -> None:
with open(file_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)

def process_data(data: list[Record]) -> list[Record]:
processed_data: list[Record] = []
for row in data:
row_copy = row.copy()
if row_copy["status"] == "active":
row_copy["is_active"] = True
else:
row_copy["is_active"] = False
processed_data.append(row_copy)
return processed_data

def main_after() -> None:
# read data
data = read_csv("data.csv")

# process data
processed_data = process_data(data)

# save data
save_csv("processed.csv", processed_data)

if __name__ == "__main__":
# main_before()
main_after()

REPLY
Andreas [ArjanCodes Team]

Nice start! I do have some questions about the solution.

It seems like there should be a type or something similar for the Record. But I can't seem to find it. Is it defined above?

The type annotation for save_csv is incorrect for the field names argument. The expected type is a list, but the default value is a tuple of values. I would also be a bit careful with having collections as a default argument because if they are mutable, you can get some unexpected behavior. In this case, using a tuple is okay because it is immutable. But, as a rule of thumb, try to not too use collections as default values.

REPLY
Alexandre Gourmelon

import csv
from typing import Any

Record = dict[str, Any]

def get_data_from_csv(csv_file_name: str) -> list[Record]:
with open(csv_file_name) as csv_file:
reader = csv.DictReader(csv_file)
data: list[Record] = list(reader)
return data

def process_data(data: list[Record]) -> list[Record]:
processed_data: list[Record] = []
for row in data:
row_copy = process_row(row)
processed_data.append(row_copy)

def process_row(row: Record) -> Record:
row_copy = row.copy()
if row_copy["status"] == "active":
row_copy["is_active"] = True
else:
row_copy["is_active"] = False
return row_copy

def write_data_to_csv(data: list[Record], csv_file_name: str, filednames: list[str]) -> None:
with open(csv_file_name) as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=filednames)
writer.writeheader()
writer.writerows(data)

def main() -> None:
data = get_data_from_csv("data.csv")
processed_data = process_data(data)
write_data_to_csv(processed_data, "processed.csv", ["name", "status", "isactive"])

if __name__ == "__main__":
main()

REPLY
Andreas [ArjanCodes Team]

Looks good! Nice work Alexandre

REPLY
Agustin Rodriguez

Hey Arjan and team,
Here is mi solution:
Record = dict[str, Any]

def csv_file_reader(file_path) -> list[Record]:
""" Read data from a path file """
with open(file_path) as f:
reader = csv.DictReader(f)
data: list[Record] = list(reader)
return data

def set_active_status(status: str) -> bool:
""" Set the active status of a record """
if status == "active":
return True
else:
return False
def csv_file_writer(
file_path: str,
data: list[Record] = None,
fieldnames: list[str] = None
) -> None:
""" Write data to a csv file """
with open(file_path, "w") as f:
if fieldnames is None:
fieldnames = []
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)

def process_data(data: list[Record]) -> list[Record]:
""" Process data """
processed_data : list[Record] = []
for row in data:
row_copy = row.copy()
row_copy["is_active"] = set_active_status(row["status"])
processed_data.append(row_copy)
return processed_data

def main() -> None:
file_read_path = "data.csv"
file_write_path = "processed_data.csv"
fieldnames = ["name", "status", "is_active"]

data = csv_file_reader(file_path=file_read_path)
processed_status = process_data(data)
csv_file_writer(file_path=file_write_path, data=processed_status, fieldnames=fieldnames)

REPLY
Andreas [ArjanCodes Team]

Nice job with the cohesion! There are however some improvements that can be made:

* set_active_status can be shortened or even removed. The conditional logic status == "active" does not need to be wrapper within an if-statement.


def set_active_status(status: str) -> bool:
""" Set the active status of a record """
if status == "active":
return True
else:
return False

Is the same as

def set_active_status(status: str) -> bool:
""" Set the active status of a record """
return status == "active":

* Is there a reason why you want to copy each row instead of doing a copy of the full data list in process_data ? In my view, it is a bit cleaner to do all sorts of copying and alteration of parameters as early as possible. In this case, make a copy of the data so you do not need to think more about it

REPLY
Agustin Rodriguez

Thanks

REPLY
Andreas [ArjanCodes Team]

No worries! If you have any questions or discussion points, please write here or on the discord! :)

REPLY
Scott Blake

import csv
from enum import Enum
from typing import Any

Record = dict[str, Any]

class RecordStatus(Enum):
ACTIVE = "active"
INACTIVE = "inactive"

def read_from_csv_file(file_path: str) -> list[Record]:
with open(file_path) as f:
reader = csv.DictReader(f)
data: list[Record] = list(reader)
return data

def record_is_active(record: Record) -> bool:
return record["status"] == RecordStatus.ACTIVE.value

def process_data(data: list[Record]) -> list[Record]:
processed_data: list[Record] = []
for row in data:
row_copy = row.copy()
row_copy["is_active"] = record_is_active(row_copy)
processed_data.append(row_copy)
return processed_data

def write_to_csv_file(file_path: str, data: list[Record]) -> None:
with open(file_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=["name", "status", "is_active"])
writer.writeheader()
writer.writerows(data)

def main() -> None:
data = read_from_csv_file("data.csv")
processed_data = process_data(data)
write_to_csv_file("processed.csv", processed_data)

if __name__ == "__main__":
main()

REPLY
Andreas [ArjanCodes Team]

Nice job Scott! A nice addition that you added a custom type Record

REPLY
Show More